Model Card

Review Rating Model

Author

Tathagata Talukdar, Kartik Gawande

Published

November 24, 2024

Model Description

This model card provides information about the Review Rating Prediction model, which predicts product review ratings on a scale of 1-5 stars based on review text.

Code
import os
import mlflow
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
import json
import plotly.io as pio
pio.renderers.default = "notebook"

# Set MLflow tracking URI
root_dir = os.path.dirname(os.path.dirname(os.getcwd()))
mlflow.set_tracking_uri(os.path.join(root_dir, 'mlruns'))

# Helper functions for MLflow dataset handling
def get_dataset_by_context(inputs, context):
    """Get dataset information by context tag"""
    for input_data in inputs:
        for tag in input_data.tags:
            if tag.key == 'mlflow.data.context' and tag.value == context:
                return input_data.dataset
    return None

def get_schema_info(dataset):
    """Extract schema information from dataset"""
    if dataset and dataset.schema:
        schema_dict = json.loads(dataset.schema)
        return schema_dict.get('mlflow_colspec', [])
    return []

def get_row_count(dataset):
    """Get number of rows from dataset profile"""
    if dataset and dataset.profile:
        profile_dict = json.loads(dataset.profile)
        return profile_dict.get('num_rows', 0)
    return 0

# Get model and run information
MODEL_NAME = "review_rating_model"
client = mlflow.tracking.MlflowClient()
# latest_version = client.get_latest_versions(MODEL_NAME, stages=["None"])[0]
latest_version = client.get_model_version("review_rating_model", 9)
run = mlflow.get_run(latest_version.run_id)


# Get dataset information
validation_data = get_dataset_by_context(run.inputs.dataset_inputs, 'validation')
training_data = get_dataset_by_context(run.inputs.dataset_inputs, 'training')
raw_data = get_dataset_by_context(run.inputs.dataset_inputs, 'raw_data')

# Display basic model info
print(f"Model Version: {latest_version.version}")
print(f"Last Updated: {datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')}")
Model Version: 9
Last Updated: 2024-11-21 00:58:51

Model Architecture

  • Type: RoBERTa-based Sequence Classification
  • Base Model: RoBERTa Base
  • Task: 5-class classification for review rating prediction
  • Output: Rating prediction (1-5 stars)

Dataset Overview

Code
# Display dataset statistics
print("Dataset Statistics:")
# print(f"Training samples: {get_row_count(training_data):,}")
# print(f"Validation samples: {get_row_count(validation_data):,}")
# print(f"Raw data samples: {get_row_count(raw_data):,}")

print(f"Training samples: {454764:,}")
print(f"Validation samples: {113690:,}")
print(f"Raw data samples: {568454:,}")

# Display schema information
if raw_data:
    print("\nFeature Information:")
    for col in get_schema_info(raw_data):
        print(f"- {col['name']} ({col['type']})")
Dataset Statistics:
Training samples: 454,764
Validation samples: 113,690
Raw data samples: 568,454

Model Performance

Code
# Get histories for loss metrics
metrics_to_plot = ['train_loss', 'eval_loss', 'learning_rate', 'grad_norm']
histories = {
    metric: client.get_metric_history(run.info.run_id, metric)
    for metric in metrics_to_plot
}

# Create two subplots
fig = go.Figure()

# Add traces for losses
final_metrics = run.data.metrics

# Create evaluation loss plot
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=steps,
    y=values,
    name='Evaluation Loss',
    mode='lines+markers'
))

fig.update_layout(
    title='Evaluation Loss During Training',
    xaxis_title='Step',
    yaxis_title='Loss',
    hovermode='x unified'
)
fig.show()

# Display final metrics
final_metrics = run.data.metrics
print("\nFinal Metrics:")
print(f"Training Loss: {final_metrics.get('train_loss', 'N/A'):.4f}")
print(f"Evaluation Loss: {final_metrics.get('eval_loss', 'N/A'):.4f}")
print(f"Training Runtime: {final_metrics.get('train_runtime', 'N/A'):.2f}s")
print(f"Samples/second: {final_metrics.get('train_samples_per_second', 'N/A'):.2f}")

Final Metrics:
Training Loss: 0.6280
Evaluation Loss: 0.5381
Training Runtime: 23582.44s
Samples/second: 3.39

Training Configuration

Code
params = run.data.params
final_metrics = run.data.metrics

training_params = {
    'Epochs': params.get('num_train_epochs', 'N/A'),
    'Train Batch Size': params.get('per_device_train_batch_size', 'N/A'),
    'Eval Batch Size': params.get('per_device_eval_batch_size', 'N/A'),
    'Learning Rate': params.get('learning_rate', 'N/A'),
    'Weight Decay': params.get('weight_decay', 'N/A'),
    'Warmup Ratio': params.get('warmup_ratio', 'N/A'),
    'Gradient Accumulation': params.get('gradient_accumulation_steps', 'N/A'),
    'Max Gradient Norm': params.get('max_grad_norm', 'N/A'),
    'Max Steps': params.get('max_steps', 'N/A'),
    'FP16': params.get('fp16', 'N/A'),
    'Eval Strategy': params.get('evaluation_strategy', 'N/A'),
    'Eval Steps': params.get('eval_steps', 'N/A'),
    'Save Steps': params.get('save_steps', 'N/A'),
    'Dataloader Workers': params.get('dataloader_num_workers', 'N/A')
}

print("Training Configuration:")
for param, value in training_params.items():
    print(f"{param}: {value}")
Training Configuration:
Epochs: 1
Train Batch Size: 16
Eval Batch Size: 16
Learning Rate: 2e-05
Weight Decay: 0.01
Warmup Ratio: 0.1
Gradient Accumulation: 2
Max Gradient Norm: 1.0
Max Steps: 2500
FP16: True
Eval Strategy: steps
Eval Steps: 100
Save Steps: 100
Dataloader Workers: 2

Model Usage Guidelines

Intended Uses

  • Automated prediction of product review ratings
  • Bulk processing of historical reviews
  • Quality control for review submissions

Out-of-Scope Uses

  • Sentiment analysis of non-product-related text
  • Analysis of non-English reviews
  • Real-time/streaming predictions

Limitations and Biases

Technical Limitations

  • Maximum sequence length: 512 tokens
  • English-language only
  • Limited to product review domain
  • Batch processing only (not optimized for real-time)

Known Biases

  • Training data class imbalance (see distribution plot)
  • Domain-specific vocabulary
  • May perform differently across product categories

Model Maintenance

Monitoring Requirements

  1. Data drift in review patterns
  2. Performance across rating classes
  3. Class distribution changes
  4. Coverage of product categories

Retraining Triggers

  • Performance below 0.8 F1-score per class
  • Significant data drift detected
  • Major changes in review patterns
  • 6-month deployment period

Model Artifacts and Governance

Code
print("Model Information:")
print(f"Model Location: {latest_version.source}")
print(f"Run ID: {run.info.run_id}")
print(f"Artifact URI: {run.info.artifact_uri}")
Model Information:
Model Location: file:///home/tathagat/workspace/projects/MLPE/tathagata-ai-839/review-rating/mlruns/863057453145536184/27e869ed62a3496fa284da0d956ee9e6/artifacts/model
Run ID: 27e869ed62a3496fa284da0d956ee9e6
Artifact URI: file:///home/tathagat/workspace/projects/MLPE/tathagata-ai-839/review-rating/mlruns/863057453145536184/27e869ed62a3496fa284da0d956ee9e6/artifacts